install.packages(c("DHARMa",
"data.table",
"lme4",
"Matrix",
"TMB",
"glmmTMB",
"mgcv",
"mgcViz",
"gratia",
"marginaleffects",
"ggplot2",
"colorspace",
"sf"))Script: Validation de modèles de régression en R
Version 1
1 Préparation
1.1 Paquets et options
library(DHARMa)
library(data.table)
library(lme4)
library(glmmTMB)
library(mgcv)
library(marginaleffects)
library(ggplot2)
library(colorspace)
library(sf)
options(show.signif.stars = FALSE)1.2 Thème pour ggplot2
base.size <- 11
plot_theme <-
theme_light(base_size = base.size) +
theme(plot.title = element_text(hjust = 0,
face = "bold",
margin = margin(l = 0, b = base.size/3, t = base.size/3)),
plot.tag = element_text(face = "bold"),
axis.line.x = element_line(color = "black",
linewidth = rel(0.5)),
axis.line.y = element_line(color = "black",
linewidth = rel(0.5)),
axis.title.x = element_text(margin = margin(t = base.size/2)),
axis.title.y = element_text(margin = margin(r = base.size/2)),
legend.position = "right",
legend.justification = "top",
legend.key.size = unit(base.size, "pt"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
plot.margin = margin(3, 3, 3, 3),
strip.text = element_text(size = rel(0.8),
hjust = 0.5,
color = "black",
margin = margin(base.size/2,
base.size/2,
base.size/2,
base.size/2)),
strip.background = element_rect(fill = "gray90", colour = NA))
theme_set(plot_theme)2 Exemple 1: Croissance de’l Épinette de Norvège dans les Alpes
2.1 Aperçu
2.1.1 Variables
quality: indice de qualité de site, de 1 (le meilleur) à 5 (le pire) ;site: identité du site ;tree: identité de l’arbre dans le site ;age.base: âge de l’arbre déterminé au niveau du sol (années) ;height: hauteur de l’arbre (m) ;dbh.cm: diamètre de l’arbre à hauteur de poitrine (cm) ;age.bh: âge de l’arbre à hauteur de poitrine (années) ;volume: volume de l’arbre (10-3 m3) ;tree.id: identité unique de l’arbre.
2.1.2 Références
Guttenberg, A. R. von. (1915). Wachstum und Ertrag der Fichte im Hochgebirge. Franz Deuticke. https://doi.org/10.5962/bhl.title.15664
Zeide, B. (1993). Analysis of Growth Equations. Forest Science, 39(3), 594–616. https://doi.org/10.1093/forestscience/39.3.594
Robinson, A. P., & Hamann, J. D. (2011). Forest analytics with R: An introduction. Springer.
2.2 Exploration des données
gutten <- readRDS("gutten.rds")gutten quality site tree age.base height dbh.cm volume age.bh tree.id
<fctr> <fctr> <int> <int> <num> <num> <num> <num> <fctr>
1: 1 1 1 20 4.2 4.6 5 9.67 1.1
2: 1 1 1 30 9.3 10.2 38 19.67 1.1
3: 1 1 1 40 14.9 14.9 123 29.67 1.1
4: 1 1 1 50 19.7 18.3 263 39.67 1.1
5: 1 1 1 60 23.0 20.7 400 49.67 1.1
---
1196: 5 7 21 110 10.3 17.1 104 90.00 7.21
1197: 5 7 21 120 10.9 18.2 123 100.00 7.21
1198: 5 7 21 130 11.5 19.2 144 110.00 7.21
1199: 5 7 21 140 12.2 20.2 166 120.00 7.21
1200: 5 7 21 150 12.9 21.1 190 130.00 7.21
ggplot(gutten) +
geom_point(aes(x = age.base, y = volume, colour = quality),
alpha = 0.5) +
facet_wrap(vars(quality)) +
labs(y = "Volume (m³/1000)",
x = "Age (years)",
colour = "Site quality")2.3 Assumer une distribution normale pour la réponse
mod <- glm(volume ~ age.base * quality,
family = gaussian(link = "identity"),
data = gutten)
summary(mod)
Call:
glm(formula = volume ~ age.base * quality, family = gaussian(link = "identity"),
data = gutten)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -552.5377 29.7576 -18.568 < 2e-16
age.base 22.0000 0.3755 58.588 < 2e-16
quality2 185.8486 38.0030 4.890 1.14e-06
quality3 231.9053 43.4972 5.331 1.16e-07
quality4 367.7007 43.2035 8.511 < 2e-16
quality5 377.6463 64.5681 5.849 6.39e-09
age.base:quality2 -8.2937 0.4915 -16.875 < 2e-16
age.base:quality3 -12.8247 0.5261 -24.376 < 2e-16
age.base:quality4 -16.6270 0.5196 -31.999 < 2e-16
age.base:quality5 -18.1689 0.7255 -25.043 < 2e-16
(Dispersion parameter for gaussian family taken to be 46920)
Null deviance: 431202073 on 1199 degrees of freedom
Residual deviance: 55834794 on 1190 degrees of freedom
AIC: 16325
Number of Fisher Scoring iterations: 2
age.seq <- seq(min(gutten$age.base), max(gutten$age.base), length.out = 100)
quality.u <- unique(gutten$quality)
pred.grid <- datagrid(age.base = age.seq,
quality = quality.u,
model = mod)
mod.pred <- predictions(mod, newdata = pred.grid)
mod.pred
age.base quality Estimate Pr(>|z|) S 2.5 % 97.5 %
10 1 -333 < 0.001 117.4 -385 -280.6
10 2 -230 < 0.001 90.9 -271 -188.7
10 3 -229 < 0.001 50.0 -285 -173.1
10 4 -131 < 0.001 18.3 -186 -76.0
10 5 -137 0.00818 6.9 -238 -35.4
--- 490 rows omitted. See ?avg_predictions and ?print.marginaleffects ---
150 1 2747 < 0.001 Inf 2682 2812.9
150 2 1689 < 0.001 Inf 1632 1746.0
150 3 1056 < 0.001 888.3 997 1114.8
150 4 621 < 0.001 336.5 564 677.9
150 5 400 < 0.001 57.7 309 490.2
Columns: rowid, estimate, p.value, s.value, conf.low, conf.high, age.base, quality, volume
Type: invlink(link)
ggplot(mod.pred) +
geom_point(data = gutten,
aes(x = age.base, y = volume, colour = quality),
alpha = 0.2) +
geom_line(aes(x = age.base, y = estimate, colour = quality)) +
geom_ribbon(aes(x = age.base, ymin = conf.low, ymax = conf.high,
group = quality),
alpha = 0.2) +
facet_wrap(vars(quality)) +
labs(y = "Volume (m³/1000)",
x = "Age (years)",
colour = "Site quality")2.3.1 Résidus bruts
mod.res <- residuals(mod)
mod.fit <- fitted(mod)
ggplot() +
geom_hline(yintercept = 0, colour = 2) +
geom_point(aes(y = mod.res, x = mod.fit), alpha = 0.3) +
labs(x = "Fitted", y = "Residual")
ggplot() +
geom_hline(yintercept = 0, colour = 2) +
geom_point(aes(y = mod.res, x = rank(mod.fit)), alpha = 0.3) +
labs(x = "Fitted (rank)", y = "Residual")2.3.2 Résidus quantiles avec DHARMa
mod.qres <- simulateResiduals(mod)
plot(mod.qres)?simulateResiduals()testUniformity(mod.qres)
Asymptotic one-sample Kolmogorov-Smirnov test
data: simulationOutput$scaledResiduals
D = 0.11783, p-value = 6.744e-15
alternative hypothesis: two-sided
testDispersion(mod.qres)
DHARMa nonparametric dispersion test via sd of residuals fitted vs.
simulated
data: simulationOutput
dispersion = 0.99014, p-value = 0.792
alternative hypothesis: two.sided
testOutliers(mod.qres)
DHARMa outlier test based on exact binomial test with approximate
expectations
data: mod.qres
outliers at both margin(s) = 39, observations = 1200, p-value =
5.506e-13
alternative hypothesis: true probability of success is not equal to 0.007968127
95 percent confidence interval:
0.02321068 0.04416257
sample estimates:
frequency of outliers (expected: 0.00796812749003984 )
0.0325
testQuantiles(mod.qres)qu = 0.75, log(sigma) = -2.38786 : outer Newton did not converge fully.
qu = 0.75, log(sigma) = -2.185603 : outer Newton did not converge fully.
qu = 0.75, log(sigma) = -2.342839 : outer Newton did not converge fully.
Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
: Fitting terminated with step failure - check results carefully
Test for location of quantiles via qgam
data: simulationOutput
p-value < 2.2e-16
alternative hypothesis: both
2.4 Distribution Gamma
mod <- glm(volume ~ age.base * quality,
family = Gamma(link = "log"),
data = gutten)
summary(mod)
Call:
glm(formula = volume ~ age.base * quality, family = Gamma(link = "log"),
data = gutten)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.703001 0.086752 42.685 < 2e-16
age.base 0.037353 0.001095 34.121 < 2e-16
quality2 -0.639137 0.110790 -5.769 1.02e-08
quality3 -1.360492 0.126807 -10.729 < 2e-16
quality4 -1.624960 0.125951 -12.902 < 2e-16
quality5 -2.652857 0.188235 -14.093 < 2e-16
age.base:quality2 0.002591 0.001433 1.808 0.0708
age.base:quality3 0.001073 0.001534 0.700 0.4843
age.base:quality4 -0.001586 0.001515 -1.047 0.2955
age.base:quality5 0.001463 0.002115 0.692 0.4892
(Dispersion parameter for Gamma family taken to be 0.39877)
Null deviance: 2571.84 on 1199 degrees of freedom
Residual deviance: 765.82 on 1190 degrees of freedom
AIC: 15342
Number of Fisher Scoring iterations: 13
age.seq <- seq(min(gutten$age.base), max(gutten$age.base), length.out = 100)
quality.u <- unique(gutten$quality)
pred.grid <- datagrid(age.base = age.seq,
quality = quality.u,
model = mod)
mod.pred <- predictions(mod, newdata = pred.grid)
ggplot(mod.pred) +
geom_point(data = gutten,
aes(x = age.base, y = volume, colour = quality),
alpha = 0.2) +
geom_line(aes(x = age.base, y = estimate, colour = quality)) +
geom_ribbon(aes(x = age.base, ymin = conf.low, ymax = conf.high,
group = quality),
alpha = 0.2) +
facet_wrap(vars(quality)) +
labs(y = "Volume (m³/1000)",
x = "Age (years)",
colour = "Site quality")mod.qres <- simulateResiduals(mod)
plot(mod.qres)plotResiduals(mod.qres, gutten$quality)
plotResiduals(mod.qres, gutten$age.base)2.5 Modifier l’effet d’une variable explicative
mod <- glm(volume ~ poly(age.base, 2) * quality,
family = Gamma(link = "log"),
data = gutten)
summary(mod)
Call:
glm(formula = volume ~ poly(age.base, 2) * quality, family = Gamma(link = "log"),
data = gutten)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.26742 0.03486 179.790 < 2e-16
poly(age.base, 2)1 50.69957 1.19719 42.349 < 2e-16
poly(age.base, 2)2 -26.97356 1.18594 -22.744 < 2e-16
quality2 -0.49023 0.04460 -10.991 < 2e-16
quality3 -1.28486 0.04876 -26.351 < 2e-16
quality4 -1.72206 0.04817 -35.746 < 2e-16
quality5 -2.56887 0.06830 -37.614 < 2e-16
poly(age.base, 2)1:quality2 -0.82340 1.57589 -0.523 0.601420
poly(age.base, 2)2:quality2 1.97857 1.52572 1.297 0.194950
poly(age.base, 2)1:quality3 5.53873 1.67588 3.305 0.000978
poly(age.base, 2)2:quality3 5.41866 1.67995 3.225 0.001292
poly(age.base, 2)1:quality4 1.55628 1.65947 0.938 0.348528
poly(age.base, 2)2:quality4 7.80608 1.67303 4.666 3.42e-06
poly(age.base, 2)1:quality5 8.72773 2.49897 3.493 0.000496
poly(age.base, 2)2:quality5 10.42116 2.46085 4.235 2.46e-05
(Dispersion parameter for Gamma family taken to be 0.2776462)
Null deviance: 2571.84 on 1199 degrees of freedom
Residual deviance: 381.28 on 1185 degrees of freedom
AIC: 14452
Number of Fisher Scoring iterations: 9
mod.qres <- simulateResiduals(mod)
plot(mod.qres)plotResiduals(mod.qres, gutten$age.base)mod <- glm(volume ~ poly(age.base, 5) * quality,
family = Gamma(link = "log"),
data = gutten)
summary(mod)
Call:
glm(formula = volume ~ poly(age.base, 5) * quality, family = Gamma(link = "log"),
data = gutten)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.20860 0.02867 216.551 < 2e-16
poly(age.base, 5)1 51.19473 0.98984 51.720 < 2e-16
poly(age.base, 5)2 -26.84320 1.00572 -26.691 < 2e-16
poly(age.base, 5)3 13.82515 0.98549 14.029 < 2e-16
poly(age.base, 5)4 -6.30964 0.97364 -6.480 1.34e-10
poly(age.base, 5)5 2.69578 0.94513 2.852 0.00442
quality2 -0.49360 0.03667 -13.461 < 2e-16
quality3 -1.28684 0.04022 -31.999 < 2e-16
quality4 -1.73142 0.04017 -43.104 < 2e-16
quality5 -2.61724 0.07035 -37.205 < 2e-16
poly(age.base, 5)1:quality2 -0.20950 1.29846 -0.161 0.87185
poly(age.base, 5)2:quality2 1.44794 1.28864 1.124 0.26140
poly(age.base, 5)3:quality2 -0.92489 1.27812 -0.724 0.46944
poly(age.base, 5)4:quality2 -0.44217 1.26179 -0.350 0.72608
poly(age.base, 5)5:quality2 0.41290 1.22443 0.337 0.73601
poly(age.base, 5)1:quality3 7.23381 1.39516 5.185 2.54e-07
poly(age.base, 5)2:quality3 1.44103 1.44009 1.001 0.31720
poly(age.base, 5)3:quality3 -2.71265 1.46673 -1.849 0.06464
poly(age.base, 5)4:quality3 2.10325 1.49545 1.406 0.15986
poly(age.base, 5)5:quality3 -1.99908 1.45315 -1.376 0.16918
poly(age.base, 5)1:quality4 4.20869 1.41361 2.977 0.00297
poly(age.base, 5)2:quality4 2.72389 1.49950 1.817 0.06954
poly(age.base, 5)3:quality4 -2.67568 1.55730 -1.718 0.08603
poly(age.base, 5)4:quality4 1.33233 1.60824 0.828 0.40759
poly(age.base, 5)5:quality4 -0.67091 1.54830 -0.433 0.66486
poly(age.base, 5)1:quality5 13.57262 3.04558 4.456 9.13e-06
poly(age.base, 5)2:quality5 3.60431 3.40721 1.058 0.29034
poly(age.base, 5)3:quality5 -4.96514 3.42769 -1.449 0.14774
poly(age.base, 5)4:quality5 3.20346 3.13789 1.021 0.30752
poly(age.base, 5)5:quality5 -2.12406 2.46748 -0.861 0.38951
(Dispersion parameter for Gamma family taken to be 0.1860418)
Null deviance: 2571.84 on 1199 degrees of freedom
Residual deviance: 227.44 on 1170 degrees of freedom
AIC: 13837
Number of Fisher Scoring iterations: 5
mod.qres <- simulateResiduals(mod)
plot(mod.qres)plotResiduals(mod.qres, gutten$quality)
plotResiduals(mod.qres, gutten$age.base)age.seq <- seq(min(gutten$age.base), max(gutten$age.base), length.out = 100)
quality.u <- unique(gutten$quality)
pred.grid <- datagrid(age.base = age.seq,
quality = quality.u,
model = mod)
mod.pred <- predictions(mod, newdata = pred.grid)
ggplot(mod.pred) +
geom_point(data = gutten,
aes(x = age.base, y = volume, colour = quality),
alpha = 0.3) +
geom_line(aes(x = age.base, y = estimate, colour = quality)) +
geom_ribbon(aes(x = age.base, ymin = conf.low, ymax = conf.high,
group = quality),
alpha = 0.2) +
facet_wrap(vars(quality)) +
labs(y = "Volume (m³/1000)",
x = "Age (years)",
colour = "Site quality")2.6 Modéliser la structure de dépendence (effets aléatoires)
mod <- glmmTMB(volume ~ poly(age.base, 5) * quality + (1 | site + tree.id),
family = Gamma(link = "log"),
data = gutten)
summary(mod) Family: Gamma ( log )
Formula: volume ~ poly(age.base, 5) * quality + (1 | site + tree.id)
Data: gutten
AIC BIC logLik deviance df.resid
13108.4 13276.4 -6521.2 13042.4 1167
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
site (Intercept) 3.484e-09 5.902e-05
tree.id (Intercept) 1.064e-01 3.262e-01
Number of obs: 1200, groups: site, 7; tree.id, 107
Dispersion estimate for Gamma family (sigma^2): 0.0811
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.15514 0.07399 83.18 < 2e-16
poly(age.base, 5)1 51.00401 0.70289 72.56 < 2e-16
poly(age.base, 5)2 -27.50198 0.68369 -40.23 < 2e-16
poly(age.base, 5)3 14.20001 0.67206 21.13 < 2e-16
poly(age.base, 5)4 -6.50287 0.65808 -9.88 < 2e-16
poly(age.base, 5)5 2.57972 0.63128 4.09 4.38e-05
quality2 -0.45603 0.09280 -4.91 8.93e-07
quality3 -1.27593 0.10559 -12.08 < 2e-16
quality4 -1.73582 0.10438 -16.63 < 2e-16
quality5 -2.71730 0.14347 -18.94 < 2e-16
poly(age.base, 5)1:quality2 2.32373 0.95120 2.44 0.01457
poly(age.base, 5)2:quality2 2.15805 0.88108 2.45 0.01431
poly(age.base, 5)3:quality2 -0.59881 0.87156 -0.69 0.49205
poly(age.base, 5)4:quality2 -0.48705 0.85472 -0.57 0.56879
poly(age.base, 5)5:quality2 0.61347 0.82079 0.75 0.45481
poly(age.base, 5)1:quality3 8.71378 0.99368 8.77 < 2e-16
poly(age.base, 5)2:quality3 1.99627 0.97097 2.06 0.03979
poly(age.base, 5)3:quality3 -3.16591 0.98965 -3.20 0.00138
poly(age.base, 5)4:quality3 2.25348 1.00329 2.25 0.02470
poly(age.base, 5)5:quality3 -1.98606 0.96927 -2.05 0.04046
poly(age.base, 5)1:quality4 5.64002 0.98924 5.70 1.19e-08
poly(age.base, 5)2:quality4 3.27944 1.01715 3.22 0.00126
poly(age.base, 5)3:quality4 -2.72104 1.05149 -2.59 0.00966
poly(age.base, 5)4:quality4 1.73744 1.07454 1.62 0.10590
poly(age.base, 5)5:quality4 -0.98233 1.02590 -0.96 0.33830
poly(age.base, 5)1:quality5 11.68135 2.06902 5.65 1.64e-08
poly(age.base, 5)2:quality5 4.84555 2.23938 2.16 0.03048
poly(age.base, 5)3:quality5 -5.76442 2.24826 -2.56 0.01035
poly(age.base, 5)4:quality5 2.89030 2.05543 1.41 0.15967
poly(age.base, 5)5:quality5 -1.75154 1.62157 -1.08 0.28007
mod.qres <- simulateResiduals(mod)
plot(mod.qres)age.seq <- seq(min(gutten$age.base), max(gutten$age.base), length.out = 100)
quality.u <- unique(gutten$quality)
pred.grid <- datagrid(age.base = age.seq,
quality = quality.u,
model = mod)
mod.pred <- predictions(mod, newdata = pred.grid, re.form = NA)
ggplot(mod.pred) +
geom_point(data = gutten,
aes(x = age.base, y = volume, colour = quality),
alpha = 0.3) +
geom_line(aes(x = age.base, y = estimate, colour = quality)) +
geom_ribbon(aes(x = age.base, ymin = conf.low, ymax = conf.high,
group = quality),
alpha = 0.2) +
facet_wrap(vars(quality)) +
labs(y = "Volume (m³/1000)",
x = "Age (years)",
colour = "Site quality")2.7 Distribution Tweedie
mod <- glmmTMB(volume ~ poly(age.base, 5) * quality + (1 | site + tree.id),
family = tweedie(link = "log"),
data = gutten)
summary(mod) Family: tweedie ( log )
Formula: volume ~ poly(age.base, 5) * quality + (1 | site + tree.id)
Data: gutten
AIC BIC logLik deviance df.resid
12172.7 12345.8 -6052.3 12104.7 1166
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
site (Intercept) 5.231e-08 0.0002287
tree.id (Intercept) 9.365e-02 0.3060257
Number of obs: 1200, groups: site, 7; tree.id, 107
Dispersion parameter for tweedie family (): 1.2
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.17116 0.06803 90.72 < 2e-16
poly(age.base, 5)1 50.33272 0.56916 88.43 < 2e-16
poly(age.base, 5)2 -26.54207 0.59619 -44.52 < 2e-16
poly(age.base, 5)3 13.27259 0.57811 22.96 < 2e-16
poly(age.base, 5)4 -5.56186 0.49005 -11.35 < 2e-16
poly(age.base, 5)5 1.72183 0.34078 5.05 4.36e-07
quality2 -0.47211 0.08537 -5.53 3.20e-08
quality3 -1.29152 0.09920 -13.02 < 2e-16
quality4 -1.73844 0.09871 -17.61 < 2e-16
quality5 -2.75756 0.15965 -17.27 < 2e-16
poly(age.base, 5)1:quality2 0.67902 0.77192 0.88 0.379047
poly(age.base, 5)2:quality2 2.53592 0.80363 3.16 0.001602
poly(age.base, 5)3:quality2 -0.96863 0.77633 -1.25 0.212141
poly(age.base, 5)4:quality2 0.20727 0.66499 0.31 0.755275
poly(age.base, 5)5:quality2 -0.21321 0.47357 -0.45 0.652556
poly(age.base, 5)1:quality3 7.90019 1.22076 6.47 9.70e-11
poly(age.base, 5)2:quality3 0.90795 1.30628 0.70 0.487014
poly(age.base, 5)3:quality3 -2.15437 1.24759 -1.73 0.084200
poly(age.base, 5)4:quality3 1.20976 1.01938 1.19 0.235323
poly(age.base, 5)5:quality3 -0.70757 0.62788 -1.13 0.259774
poly(age.base, 5)1:quality4 4.98122 1.31661 3.78 0.000155
poly(age.base, 5)2:quality4 3.31719 1.42924 2.32 0.020289
poly(age.base, 5)3:quality4 -2.84417 1.37715 -2.07 0.038899
poly(age.base, 5)4:quality4 1.60333 1.13232 1.42 0.156784
poly(age.base, 5)5:quality4 -0.65574 0.70052 -0.94 0.349234
poly(age.base, 5)1:quality5 13.45085 4.73068 2.84 0.004465
poly(age.base, 5)2:quality5 2.35564 5.10590 0.46 0.644544
poly(age.base, 5)3:quality5 -4.36104 4.61375 -0.95 0.344544
poly(age.base, 5)4:quality5 1.75873 3.33738 0.53 0.598207
poly(age.base, 5)5:quality5 -0.66066 1.61569 -0.41 0.682612
AIC(mod)[1] 12172.7
mod.qres <- simulateResiduals(mod)
plot(mod.qres)age.seq <- seq(min(gutten$age.base), max(gutten$age.base), length.out = 100)
quality.u <- unique(gutten$quality)
pred.grid <- datagrid(age.base = age.seq,
quality = quality.u,
model = mod)
mod.pred <- predictions(mod, newdata = pred.grid, re.form = NA)
ggplot(mod.pred) +
geom_point(data = gutten,
aes(x = age.base, y = volume, colour = quality),
alpha = 0.3) +
geom_line(aes(x = age.base, y = estimate, colour = quality)) +
geom_ribbon(aes(x = age.base, ymin = conf.low, ymax = conf.high,
group = quality),
alpha = 0.2) +
facet_wrap(vars(quality)) +
labs(y = "Volume (m³/1000)",
x = "Age (years)",
colour = "Site quality")2.8 Modéliser la dispersion directement
mod <- glmmTMB(volume ~ poly(age.base, 5) * quality + (1 | site + tree.id),
dispformula = ~ age.base * quality,
family = Gamma(link = "log"),
data = gutten)Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
NA/NaN function evaluation
summary(mod) Family: Gamma ( log )
Formula: volume ~ poly(age.base, 5) * quality + (1 | site + tree.id)
Dispersion: ~age.base * quality
Data: gutten
AIC BIC logLik deviance df.resid
12073.2 12287.0 -5994.6 11989.2 1158
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
site (Intercept) 0.0008954 0.02992
tree.id (Intercept) 0.0967602 0.31106
Number of obs: 1200, groups: site, 7; tree.id, 107
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.16236 0.07579 81.31 < 2e-16
poly(age.base, 5)1 50.79307 0.53874 94.28 < 2e-16
poly(age.base, 5)2 -27.04771 0.50800 -53.24 < 2e-16
poly(age.base, 5)3 13.72387 0.50697 27.07 < 2e-16
poly(age.base, 5)4 -5.69875 0.45568 -12.51 < 2e-16
poly(age.base, 5)5 1.52579 0.26165 5.83 5.50e-09
quality2 -0.46234 0.10880 -4.25 2.14e-05
quality3 -1.26978 0.11519 -11.02 < 2e-16
quality4 -1.71968 0.15076 -11.41 < 2e-16
quality5 -2.69278 0.15593 -17.27 < 2e-16
poly(age.base, 5)1:quality2 -0.09572 0.86160 -0.11 0.91154
poly(age.base, 5)2:quality2 2.40647 0.82348 2.92 0.00347
poly(age.base, 5)3:quality2 -0.98120 0.82159 -1.19 0.23237
poly(age.base, 5)4:quality2 0.20850 0.72358 0.29 0.77323
poly(age.base, 5)5:quality2 -0.02178 0.40122 -0.05 0.95671
poly(age.base, 5)1:quality3 6.75831 1.30912 5.16 2.44e-07
poly(age.base, 5)2:quality3 1.90020 1.33860 1.42 0.15574
poly(age.base, 5)3:quality3 -3.09653 1.25948 -2.46 0.01395
poly(age.base, 5)4:quality3 1.89532 0.95368 1.99 0.04688
poly(age.base, 5)5:quality3 -0.70906 0.43413 -1.63 0.10241
poly(age.base, 5)1:quality4 4.64536 0.99105 4.69 2.77e-06
poly(age.base, 5)2:quality4 3.06374 1.04892 2.92 0.00349
poly(age.base, 5)3:quality4 -2.73694 1.08272 -2.53 0.01148
poly(age.base, 5)4:quality4 1.38561 0.93736 1.48 0.13935
poly(age.base, 5)5:quality4 -0.36483 0.50884 -0.72 0.47338
poly(age.base, 5)1:quality5 10.30089 3.30954 3.11 0.00186
poly(age.base, 5)2:quality5 4.77682 3.61374 1.32 0.18622
poly(age.base, 5)3:quality5 -5.47487 3.37120 -1.62 0.10437
poly(age.base, 5)4:quality5 1.84243 2.48729 0.74 0.45885
poly(age.base, 5)5:quality5 -0.29990 1.12646 -0.27 0.79006
Dispersion model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.860216 0.194841 4.415 1.01e-05
age.base 0.044918 0.002567 17.498 < 2e-16
quality2 -0.999465 0.246783 -4.050 5.12e-05
quality3 -1.554165 0.292994 -5.304 1.13e-07
quality4 -0.189217 0.294010 -0.644 0.519852
quality5 -0.858843 0.489647 -1.754 0.079430
age.base:quality2 0.004986 0.003350 1.489 0.136595
age.base:quality3 0.014015 0.003761 3.727 0.000194
age.base:quality4 -0.008036 0.003720 -2.160 0.030775
age.base:quality5 -0.006616 0.005827 -1.135 0.256176
mod.qres <- simulateResiduals(mod)
plot(mod.qres)age.seq <- seq(min(gutten$age.base), max(gutten$age.base), length.out = 100)
quality.u <- unique(gutten$quality)
pred.grid <- datagrid(age.base = age.seq,
quality = quality.u,
model = mod)
mod.pred <- predictions(mod, newdata = pred.grid, re.form = NA)
ggplot(mod.pred) +
geom_point(data = gutten,
aes(x = age.base, y = volume, colour = quality),
alpha = 0.3) +
geom_line(aes(x = age.base, y = estimate, colour = quality)) +
geom_ribbon(aes(x = age.base, ymin = conf.low, ymax = conf.high,
group = quality),
alpha = 0.2) +
facet_wrap(vars(quality)) +
labs(y = "Volume (m³/1000)",
x = "Age (years)",
colour = "Site quality")3 Exemple 2: Effet de l’ozone sur les semis de l’épinette de Sitka
3.1 Aperçu
3.1.1 Variables
tree.id: identité de l’arbre (79 individus) ;day: nombre de jours depuis le 1er janvier 1988 ;size: taille de l’arbre (hauteur multipliée par le diamètre, 10-4 m3) ;treatment: indique si les arbres sont maintenus dans un environnement normal (control) ou enrichi (70 nl l-1) en ozone (ozone).
3.1.2 Références
Diggle, P., Heagerty, P., Liang, K.-Y., & Zeger, S. (2002). Analysis of Longitudinal Data (Second Edition). Oxford University Press.
Données recueillies par Dr Peter Lucas (Biological Sciences Division, Lancaster University).
3.2 Exploration des données
sitka <- readRDS("sitka.rds")sitka tree.id day treatment size
<fctr> <int> <fctr> <num>
1: 1 152 ozone 90.92182
2: 1 174 ozone 145.47438
3: 1 201 ozone 223.63159
4: 1 227 ozone 365.03747
5: 1 258 ozone 468.71739
---
944: 79 556 control 259.82284
945: 79 579 control 383.75334
946: 79 613 control 395.44037
947: 79 639 control 497.70125
948: 79 674 control 566.79631
ggplot(sitka) +
geom_line(data = sitka,
aes(x = day, y = size,
group = tree.id, colour = treatment),
alpha = 0.5) +
scale_colour_brewer(type = "qual", palette = "Set1") +
labs(x = "Time (days)",
y = "Size (cm²m)",
colour = "Treatment")3.3 Un prémier modèle
mod <- glmmTMB(size ~ day * treatment + (1 | tree.id),
data = sitka,
family = Gamma(link = "log"))Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
NA/NaN function evaluation
summary(mod) Family: Gamma ( log )
Formula: size ~ day * treatment + (1 | tree.id)
Data: sitka
AIC BIC logLik deviance df.resid
11185.8 11214.9 -5586.9 11173.8 942
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
tree.id (Intercept) 0.3663 0.6052
Number of obs: 948, groups: tree.id, 79
Dispersion estimate for Gamma family (sigma^2): 0.0917
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.387e+00 1.285e-01 34.14 < 2e-16
day 3.214e-03 9.311e-05 34.52 < 2e-16
treatmentozone -1.670e-01 1.554e-01 -1.08 0.28236
day:treatmentozone -3.200e-04 1.125e-04 -2.85 0.00443
day.seq <- seq(min(sitka$day), max(sitka$day), length.out = 100)
treatment.u <- unique(sitka$treatment)
pred.grid <- datagrid(day = day.seq,
treatment = treatment.u,
model = mod)
mod.pred <- predictions(mod, newdata = pred.grid, re.form = NA)
ggplot(mod.pred) +
geom_line(data = sitka, aes(x = day, y = size, group = tree.id),
alpha = 0.2) +
geom_ribbon(aes(x = day, ymin = conf.low, ymax = conf.high,
fill = treatment),
alpha = 0.2) +
geom_line(aes(x = day, y = estimate, colour = treatment)) +
scale_colour_brewer(type = "qual", palette = "Set1") +
scale_fill_brewer(type = "qual", palette = "Set1",
guide = "none") +
labs(x = "Time (days)",
y = "Size (cm²m)",
colour = "Treatment")mod.qres <- simulateResiduals(mod)
plot(mod.qres)mod.qres.time <- recalculateResiduals(mod.qres, group = sitka$day)
testTemporalAutocorrelation(residuals(mod.qres.time), unique(sitka$day))
Durbin-Watson test
data: simulationOutput$scaledResiduals ~ 1
DW = 0.96269, p-value = 0.04487
alternative hypothesis: true autocorrelation is not 0
3.4 Intégrer l’autocorrelation temporelle
3.4.1 Variation temporelle
mod <- gam(size ~ s(day) + treatment + s(tree.id, bs = "re"),
data = sitka,
family = Gamma(link = "log"),
method = "REML")
summary(mod)
Family: Gamma
Link function: log
Formula:
size ~ s(day) + treatment + s(tree.id, bs = "re")
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.7074 0.1211 47.113 <2e-16
treatmentozone -0.2964 0.1465 -2.023 0.0434
Approximate significance of smooth terms:
edf Ref.df F p-value
s(day) 8.302 8.849 1681.3 <2e-16
s(tree.id) 76.515 77.000 164.1 <2e-16
R-sq.(adj) = 0.943 Deviance explained = 96.5%
-REML = 5095.8 Scale est. = 0.027704 n = 948
mod.qres <- simulateResiduals(mod)Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
Registered S3 method overwritten by 'mgcViz':
method from
+.gg GGally
plot(mod.qres)day.seq <- seq(min(sitka$day), max(sitka$day), length.out = 100)
treatment.u <- unique(sitka$treatment)
pred.grid <- datagrid(day = day.seq,
treatment = treatment.u,
model = mod)
mod.pred <- predictions(mod,
newdata = pred.grid,
exclude = "s(tree.id)")
ggplot(mod.pred) +
geom_line(data = sitka, aes(x = day, y = size, group = tree.id),
alpha = 0.2) +
geom_ribbon(aes(x = day, ymin = conf.low, ymax = conf.high,
fill = treatment),
alpha = 0.2) +
geom_line(aes(x = day, y = estimate, colour = treatment)) +
scale_colour_brewer(type = "qual", palette = "Set1") +
scale_fill_brewer(type = "qual", palette = "Set1",
guide = "none") +
labs(x = "Time (days)",
y = "Size (cm²m)",
colour = "Treatment")mod.qres.time <- recalculateResiduals(mod.qres, group = sitka$day)
testTemporalAutocorrelation(residuals(mod.qres.time), unique(sitka$day))
Durbin-Watson test
data: simulationOutput$scaledResiduals ~ 1
DW = 2.5432, p-value = 0.3221
alternative hypothesis: true autocorrelation is not 0
3.4.2 Variation temporelle par traitement
mod <- gam(size ~
s(day) +
s(day, treatment, bs = "sz") +
s(tree.id, bs = "re"),
data = sitka,
family = Gamma(link = "log"),
method = "REML")
summary(mod)
Family: Gamma
Link function: log
Formula:
size ~ s(day) + s(day, treatment, bs = "sz") + s(tree.id, bs = "re")
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.55830 0.07356 75.57 <2e-16
Approximate significance of smooth terms:
edf Ref.df F p-value
s(day) 8.332 8.860 1603.65 <2e-16
s(day,treatment) 3.697 4.318 11.91 <2e-16
s(tree.id) 76.541 77.000 172.70 <2e-16
R-sq.(adj) = 0.939 Deviance explained = 96.7%
-REML = 5080.6 Scale est. = 0.026457 n = 948
mod.qres <- simulateResiduals(mod)
plot(mod.qres)day.seq <- seq(min(sitka$day), max(sitka$day), length.out = 100)
treatment.u <- unique(sitka$treatment)
pred.grid <- datagrid(day = day.seq,
treatment = treatment.u,
model = mod)
mod.pred <- predictions(mod,
newdata = pred.grid,
exclude = "s(tree.id)")
ggplot(mod.pred) +
geom_line(data = sitka, aes(x = day, y = size, group = tree.id),
alpha = 0.2) +
geom_ribbon(aes(x = day, ymin = conf.low, ymax = conf.high,
fill = treatment),
alpha = 0.2) +
geom_line(aes(x = day, y = estimate, colour = treatment)) +
scale_colour_brewer(type = "qual", palette = "Set1") +
scale_fill_brewer(type = "qual", palette = "Set1",
guide = "none") +
labs(x = "Time (days)",
y = "Size (cm²m)",
colour = "Treatment")mod.qres.time <- recalculateResiduals(mod.qres, group = sitka$day)
testTemporalAutocorrelation(residuals(mod.qres.time), unique(sitka$day))
Durbin-Watson test
data: simulationOutput$scaledResiduals ~ 1
DW = 2.8269, p-value = 0.1212
alternative hypothesis: true autocorrelation is not 0
gratia::draw(mod)Registered S3 method overwritten by 'gratia':
method from
simulate.gam mgcViz
4 Exemple 3: Distribution des lichens en Suède
4.1 Aperçu
4.1.1 Variables
tree.id: identité unique de l’arbre ;- Informations sur l’inventaire :
ip(période d’inventaire,1ou2) ;region(région de l’inventaire) ;year(année de l’évaluation) ; east,north: coordonnées projetées dans la grille de référence suédoise (EPSG : 3006) ;species: genre du lichen (soitUsnea,Aleactoria, ouBryoria) ;occurrence: indique si des lichens du genre correspondant ont été trouvés sur l’arbre (1) ou pas (0) ;- Variables environnementales :
mat(proportion de forêts matures dans un rayon de 100 m) ;temp(température annuelle moyenne, °C) ;rain(cumul annuel de pluie, mm) ;ndep(dépôt annuel moyen d’azote, kg N ha-1 an-1) ; - Variables au niveau de l’arbre :
dbh(diamètre à hauteur de poitrine, mm) ;crl(limite du houppier, m) ; - Variables au niveau du peuplement :
bas(surface terrière, m2 ha-1),age(âge du peuplement) ; - Grille d’agrégation spatiale (20km×20km) :
east.agg,north.agg,rast.agg.id
4.1.2 Références
Esseen, Per-Anders et al. (2022), Multiple drivers of large‐scale lichen decline in boreal forest canopies, Dryad, Dataset, https://doi.org/10.5061/dryad.2ngf1vhq5
Esseen, P.-A., Ekström, M., Grafström, A., Jonsson, B. G., Palmqvist, K., Westerlund, B., & Ståhl, G. (2022). Multiple drivers of large-scale lichen decline in boreal forest canopies. Global Change Biology, 28(10), 3293–3309. https://doi.org/10.1111/gcb.16128
4.2 Exploration des données
lichen <- readRDS("lichen.rds")
swe <- st_read("adm_swe.gpkg")lichenKey: <tree.id>
tree.id ip region tract year east north species occurrence
<int> <fctr> <int> <int> <int> <int> <int> <fctr> <int>
1: 1 1 1 1525 1994 542300 7214700 Aleactoria 1
2: 1 1 1 1525 1994 542300 7214700 Usnea 0
3: 1 1 1 1525 1994 542300 7214700 Bryoria 1
4: 1 2 1 1525 2003 542300 7214700 Aleactoria 1
5: 1 2 1 1525 2003 542300 7214700 Usnea 0
---
25190: 6134 1 2 2575 2002 531100 7044600 Usnea 1
25191: 6134 1 2 2575 2002 531100 7044600 Bryoria 1
25192: 6135 1 2 2575 2002 531700 7044600 Aleactoria 0
25193: 6135 1 2 2575 2002 531700 7044600 Usnea 1
25194: 6135 1 2 2575 2002 531700 7044600 Bryoria 1
east.agg north.agg dbh crl bas age mat temp rain
<num> <num> <int> <num> <int> <int> <num> <num> <num>
1: 537310.6 7220972 187 2.5 11 255 20.8800 0.2900000 502.11
2: 537310.6 7220972 187 2.5 11 255 20.8800 0.2900000 502.11
3: 537310.6 7220972 187 2.5 11 255 20.8800 0.2900000 502.11
4: 537310.6 7220972 184 3.1 6 264 20.8800 0.7600001 472.68
5: 537310.6 7220972 184 3.1 6 264 20.8800 0.7600001 472.68
---
25190: 537310.6 7040972 345 1.8 33 110 44.6027 2.7000000 492.35
25191: 537310.6 7040972 345 1.8 33 110 44.6027 2.7000000 492.35
25192: 537310.6 7040972 363 3.5 24 112 81.6669 2.7000000 492.35
25193: 537310.6 7040972 363 3.5 24 112 81.6669 2.7000000 492.35
25194: 537310.6 7040972 363 3.5 24 112 81.6669 2.7000000 492.35
ndep rast.agg.id
<num> <int>
1: 3.77445 448
2: 3.77445 448
3: 3.77445 448
4: 2.48204 448
5: 2.48204 448
---
25190: 3.73894 439
25191: 3.73894 439
25192: 3.73894 439
25193: 3.73894 439
25194: 3.73894 439
lichen.usnea1 <- lichen[species == "Usnea" & ip == 1]plot_theme <-
plot_theme +
theme(panel.grid.major = element_line(linewidth = rel(1)))
theme_set(plot_theme)lichen.prop.agg <-
lichen[,
.(occurrence = mean(occurrence)),
by = .(ip, species, east.agg, north.agg)]
ggplot(lichen.prop.agg) +
geom_raster(aes(x = east.agg, y = north.agg, fill = occurrence)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_viridis_c() +
facet_grid(rows = vars(ip), cols = vars(species)) +
labs(x = NULL, y = NULL, fill = "Occurence probability")4.3 Modèle climatique
mod <-
glm(occurrence ~ temp * rain,
data = lichen.usnea1,
family = binomial(link = "logit"))
summary(mod)
Call:
glm(formula = occurrence ~ temp * rain, family = binomial(link = "logit"),
data = lichen.usnea1)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -8.9565336 0.5593428 -16.01 <2e-16
temp 1.6318860 0.0933065 17.49 <2e-16
rain 0.0185853 0.0011290 16.46 <2e-16
temp:rain -0.0034271 0.0001799 -19.05 <2e-16
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 5807.6 on 4320 degrees of freedom
Residual deviance: 5101.6 on 4317 degrees of freedom
AIC: 5109.6
Number of Fisher Scoring iterations: 5
mod.pred <- avg_predictions(mod, by = c("east.agg", "north.agg"))
ggplot(mod.pred) +
geom_raster(aes(x = east.agg, y = north.agg, fill = estimate)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_viridis_c() +
labs(x = NULL, y = NULL, fill = "Estimated occcurence\nprobability")mod.qres <- simulateResiduals(mod)
plot(mod.qres, quantreg = TRUE)4.3.1 Vérifier la dispersion du modèle logistique
mod.res.agg <- recalculateResiduals(mod.qres, lichen.usnea1$rast.agg.id)
plot(mod.res.agg, quantreg = TRUE)4.3.2 Vérifier l’autocorrélation spatiale résiduelle
testSpatialAutocorrelation(mod.qres, lichen.usnea1$east, lichen.usnea1$north)
DHARMa Moran's I test for distance-based autocorrelation
data: mod.qres
observed = 0.03284325, expected = -0.00023148, sd = 0.00196127, p-value
< 2.2e-16
alternative hypothesis: Distance-based autocorrelation
lichen.usnea1[, qres := residuals(mod.qres)]
lichen.usnea1[,
.(qres = sum(qres)/.N),
by = c("east.agg", "north.agg")] |>
ggplot() +
geom_raster(aes(x = east.agg, y = north.agg, fill = qres)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_continuous_diverging("Blue-Red", mid = 0.5) +
labs(x = NULL, y = NULL, fill = "Quantile residual")4.3.3 Vérifier les résidus en fonction des variables explicatives
plotResiduals(mod.qres, lichen.usnea1$temp, quantreg = TRUE)
plotResiduals(mod.qres, lichen.usnea1$rain, quantreg = TRUE)plotResiduals(mod.qres, lichen.usnea1$ndep, quantreg = TRUE)
plotResiduals(mod.qres, lichen.usnea1$mat, quantreg = TRUE)plotResiduals(mod.qres, lichen.usnea1$dbh, quantreg = TRUE)
plotResiduals(mod.qres, lichen.usnea1$crl, quantreg = TRUE)plotResiduals(mod.qres, lichen.usnea1$bas, quantreg = TRUE)
plotResiduals(mod.qres, lichen.usnea1$age, quantreg = TRUE)4.4 Modèle écologique
mod <-
glm(occurrence ~
temp * rain +
mat + ndep +
dbh + crl +
bas + age,
data = lichen.usnea1,
family = binomial(link = "logit"))
summary(mod)
Call:
glm(formula = occurrence ~ temp * rain + mat + ndep + dbh + crl +
bas + age, family = binomial(link = "logit"), data = lichen.usnea1)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -9.4077498 0.6266171 -15.014 < 2e-16
temp 1.4903783 0.1024332 14.550 < 2e-16
rain 0.0175950 0.0011502 15.297 < 2e-16
mat 0.0045056 0.0013538 3.328 0.000874
ndep -0.1739911 0.0449058 -3.875 0.000107
dbh 0.0017239 0.0004256 4.050 5.12e-05
crl 0.0672831 0.0153119 4.394 1.11e-05
bas 0.0080440 0.0046167 1.742 0.081442
age 0.0039210 0.0011404 3.438 0.000586
temp:rain -0.0029079 0.0002207 -13.178 < 2e-16
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 5807.6 on 4320 degrees of freedom
Residual deviance: 4969.6 on 4311 degrees of freedom
AIC: 4989.6
Number of Fisher Scoring iterations: 5
mod.pred <- avg_predictions(mod, by = c("east.agg", "north.agg"))
ggplot(mod.pred) +
geom_raster(aes(x = east.agg, y = north.agg, fill = estimate)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_viridis_c() +
labs(x = NULL, y = NULL, fill = "Estimated occcurence\nprobability")mod.qres <- simulateResiduals(mod)
plot(mod.qres, quantreg = TRUE)mod.res.agg <- recalculateResiduals(mod.qres, lichen.usnea1$rast.agg.id)
plot(mod.res.agg, quantreg = TRUE)testSpatialAutocorrelation(mod.qres, lichen.usnea1$east, lichen.usnea1$north)
DHARMa Moran's I test for distance-based autocorrelation
data: mod.qres
observed = 0.03248526, expected = -0.00023148, sd = 0.00196127, p-value
< 2.2e-16
alternative hypothesis: Distance-based autocorrelation
lichen.usnea1[, qres := residuals(mod.qres)]
lichen.usnea1[,
.(qres = mean(qres)),
by = c("east.agg", "north.agg")] |>
ggplot() +
geom_raster(aes(x = east.agg, y = north.agg, fill = qres)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_continuous_diverging("Blue-Red", mid = 0.5) +
labs(x = NULL, y = NULL, fill = "Quantile residual")4.5 Modèle écologique avec effet spatial
mod <-
gam(occurrence ~
s(east, north, k = 60) +
temp * rain +
mat + ndep +
dbh + crl +
bas + age,
data = lichen.usnea1,
family = binomial(link = "logit"),
method = "REML")
summary(mod)
Family: binomial
Link function: logit
Formula:
occurrence ~ s(east, north, k = 60) + temp * rain + mat + ndep +
dbh + crl + bas + age
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.8594430 1.5600123 -2.474 0.01336
temp 0.7561983 0.2868751 2.636 0.00839
rain 0.0027300 0.0026149 1.044 0.29647
mat 0.0085476 0.0015090 5.665 1.47e-08
ndep -0.0242272 0.0885919 -0.273 0.78449
dbh 0.0026412 0.0004582 5.765 8.17e-09
crl 0.0381449 0.0169523 2.250 0.02444
bas -0.0092874 0.0051352 -1.809 0.07052
age 0.0102769 0.0013007 7.901 2.77e-15
temp:rain -0.0011520 0.0004931 -2.336 0.01947
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(east,north) 38.88 48.67 482.4 <2e-16
R-sq.(adj) = 0.298 Deviance explained = 25.5%
-REML = 2276.2 Scale est. = 1 n = 4321
mod.pred <- avg_predictions(mod, by = c("east.agg", "north.agg"))
ggplot(mod.pred) +
geom_raster(aes(x = east.agg, y = north.agg, fill = estimate)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_viridis_c() +
labs(x = NULL, y = NULL, fill = "Estimated occcurence\nprobability")mod.qres <- simulateResiduals(mod)
plot(mod.qres, quantreg = TRUE)mod.res.agg <- recalculateResiduals(mod.qres, lichen.usnea1$rast.agg.id)
plot(mod.res.agg, quantreg = TRUE)testSpatialAutocorrelation(mod.qres, lichen.usnea1$east, lichen.usnea1$north)
DHARMa Moran's I test for distance-based autocorrelation
data: mod.qres
observed = 0.00684166, expected = -0.00023148, sd = 0.00196127, p-value
= 0.0003105
alternative hypothesis: Distance-based autocorrelation
lichen.usnea1[, qres := residuals(mod.qres)]
lichen.usnea1[,
.(qres = mean(qres)),
by = c("east.agg", "north.agg")] |>
ggplot() +
geom_raster(aes(x = east.agg, y = north.agg, fill = qres)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_continuous_diverging("Blue-Red", mid = 0.5) +
labs(x = NULL, y = NULL, fill = "Quantile residual")4.6 Modèle écologique spatial pour deux périodes d’inventaire
lichen.usnea <- lichen[species == "Usnea"]
mod <-
gam(occurrence ~
s(east, north, by = ip, k = 60) +
temp * rain +
mat + ndep +
dbh + crl +
bas + age,
data = lichen.usnea,
family = binomial(link = "logit"),
method = "REML")
summary(mod)
Family: binomial
Link function: logit
Formula:
occurrence ~ s(east, north, by = ip, k = 60) + temp * rain +
mat + ndep + dbh + crl + bas + age
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.7335716 1.1081664 -4.272 1.94e-05
temp 0.7334869 0.2064062 3.554 0.00038
rain 0.0019242 0.0018769 1.025 0.30527
mat 0.0100452 0.0011597 8.662 < 2e-16
ndep 0.1931172 0.0367254 5.258 1.45e-07
dbh 0.0022839 0.0003338 6.842 7.79e-12
crl 0.0187202 0.0119687 1.564 0.11779
bas -0.0128068 0.0037511 -3.414 0.00064
age 0.0087871 0.0009198 9.553 < 2e-16
temp:rain -0.0011245 0.0003510 -3.204 0.00136
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(east,north):ip1 39.66 49.20 587.8 <2e-16
s(east,north):ip2 39.49 48.91 608.4 <2e-16
R-sq.(adj) = 0.317 Deviance explained = 27.4%
-REML = 4191.7 Scale est. = 1 n = 8398
mod.pred <- avg_predictions(mod, by = c("east.agg", "north.agg", "ip"))
ggplot(mod.pred) +
geom_raster(aes(x = east.agg, y = north.agg, fill = estimate)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_viridis_c() +
facet_wrap(vars(ip)) +
labs(x = NULL, y = NULL, fill = "Estimated occcurence\nprobability")mod.qres <- simulateResiduals(mod)
plot(mod.qres, quantreg = TRUE)mod.res.agg <- recalculateResiduals(mod.qres, lichen.usnea$rast.agg.id)
plot(mod.res.agg, quantreg = TRUE)lichen.usnea[, qres := residuals(mod.qres)]
lichen.usnea[,
.(qres = mean(qres)),
by = c("ip", "east.agg", "north.agg")] |>
ggplot() +
geom_raster(aes(x = east.agg, y = north.agg, fill = qres)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_continuous_diverging("Blue-Red", mid = 0.5) +
facet_wrap(vars(ip)) +
labs(x = NULL, y = NULL, fill = "Quantile residual")4.7 Modèle écologique non-linéaire
mod <-
gam(occurrence ~
s(east, north, by = ip, k = 60) +
ti(temp, k = 5) +
ti(rain, k = 5) +
ti(temp, rain, k = c(5, 5)) +
s(mat) +
s(ndep) +
s(dbh) +
s(crl) +
s(bas) +
s(age),
data = lichen.usnea,
family = binomial(link = "logit"),
method = "REML",
select = TRUE,
optimizer = "efs")
summary(mod)
Family: binomial
Link function: logit
Formula:
occurrence ~ s(east, north, by = ip, k = 60) + ti(temp, k = 5) +
ti(rain, k = 5) + ti(temp, rain, k = c(5, 5)) + s(mat) +
s(ndep) + s(dbh) + s(crl) + s(bas) + s(age)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.74038 0.06828 -10.84 <2e-16
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(east,north):ip1 32.884 59 359.606 < 2e-16
s(east,north):ip2 35.073 59 409.928 < 2e-16
ti(temp) 0.140 4 0.134 0.29038
ti(rain) 0.956 4 21.759 < 2e-16
ti(temp,rain) 1.253 16 6.381 0.00263
s(mat) 1.067 9 43.395 < 2e-16
s(ndep) 4.391 9 34.870 < 2e-16
s(dbh) 1.944 9 22.355 3.12e-06
s(crl) 4.829 9 57.582 < 2e-16
s(bas) 2.713 9 44.003 < 2e-16
s(age) 6.767 9 210.506 < 2e-16
R-sq.(adj) = 0.342 Deviance explained = 29.9%
-REML = 4040.6 Scale est. = 1 n = 8398
mod.pred <- avg_predictions(mod, by = c("east.agg", "north.agg", "ip"))
ggplot(mod.pred) +
geom_raster(aes(x = east.agg, y = north.agg, fill = estimate)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_viridis_c() +
facet_wrap(vars(ip)) +
labs(x = NULL, y = NULL, fill = "Estimated occcurence\nprobability")mod.qres <- simulateResiduals(mod)
plot(mod.qres, quantreg = TRUE)mod.res.agg <- recalculateResiduals(mod.qres, lichen.usnea$rast.agg.id)
plot(mod.res.agg, quantreg = TRUE)lichen.usnea[, qres := residuals(mod.qres)]
lichen.usnea[,
.(qres = mean(qres)),
by = c("ip", "east.agg", "north.agg")] |>
ggplot() +
geom_raster(aes(x = east.agg, y = north.agg, fill = qres)) +
geom_sf(data = swe, fill = NA, colour = "black") +
scale_fill_continuous_diverging("Blue-Red", mid = 0.5) +
facet_wrap(vars(ip)) +
labs(x = NULL, y = NULL, fill = "Quantile residual")gratia::draw(mod, rug = FALSE, select = 1:2)gratia::draw(mod, rug = FALSE, select = 3:5)gratia::draw(mod, rug = FALSE, select = 6:7)gratia::draw(mod, rug = FALSE, select = 8:9)gratia::draw(mod, rug = FALSE, select = 10:11)5 Session
sessionInfo()R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Pop!_OS 22.04 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
locale:
[1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
[7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
time zone: America/Toronto
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] sf_1.0-16 colorspace_2.1-0 ggplot2_3.5.1
[4] marginaleffects_0.19.0 mgcv_1.9-1 nlme_3.1-164
[7] glmmTMB_1.1.9 lme4_1.1-35.3 Matrix_1.7-0
[10] data.table_1.15.4 DHARMa_0.4.6
loaded via a namespace (and not attached):
[1] Rdpack_2.6 DBI_1.2.2 gridExtra_2.3
[4] sandwich_3.1-0 rlang_1.1.3 magrittr_2.0.3
[7] multcomp_1.4-25 matrixStats_1.3.0 e1071_1.7-14
[10] compiler_4.4.0 vctrs_0.6.5 stringr_1.5.1
[13] pkgconfig_2.0.3 fastmap_1.1.1 backports_1.4.1
[16] gratia_0.9.0 labeling_0.4.3 utf8_1.2.4
[19] promises_1.3.0 rmarkdown_2.26 nloptr_2.0.3
[22] mgcViz_0.1.11 purrr_1.0.2 xfun_0.43
[25] jsonlite_1.8.8 later_1.3.2 parallel_4.4.0
[28] R6_2.5.1 gap.datasets_0.0.6 stringi_1.8.3
[31] qgam_1.3.4 RColorBrewer_1.1-3 GGally_2.2.1
[34] boot_1.3-30 lmtest_0.9-40 numDeriv_2016.8-1.1
[37] estimability_1.5 Rcpp_1.0.12 iterators_1.0.14
[40] knitr_1.46 zoo_1.8-12 mvnfast_0.2.8
[43] httpuv_1.6.15 splines_4.4.0 tidyselect_1.2.1
[46] yaml_2.3.8 viridis_0.6.5 doParallel_1.0.17
[49] TMB_1.9.11 codetools_0.2-20 miniUI_0.1.1.1
[52] lattice_0.22-6 tibble_3.2.1 plyr_1.8.9
[55] shiny_1.8.1.1 withr_3.0.0 coda_0.19-4.1
[58] evaluate_0.23 survival_3.6-4 isoband_0.2.7
[61] ggstats_0.6.0 units_0.8-5 proxy_0.4-27
[64] pillar_1.9.0 gap_1.5-3 KernSmooth_2.23-22
[67] checkmate_2.3.1 foreach_1.5.2 insight_0.19.10
[70] generics_0.1.3 munsell_0.5.1 scales_1.3.0
[73] minqa_1.2.6 xtable_1.8-4 gamm4_0.2-6
[76] class_7.3-22 glue_1.7.0 emmeans_1.10.1
[79] tools_4.4.0 mvtnorm_1.2-4 grid_4.4.0
[82] ape_5.8 ggokabeito_0.1.0 tidyr_1.3.1
[85] rbibutils_2.2.16 patchwork_1.2.0 cli_3.6.2
[88] fansi_1.0.6 viridisLite_0.4.2 dplyr_1.1.4
[91] gtable_0.3.5 digest_0.6.35 classInt_0.4-10
[94] TH.data_1.1-2 htmlwidgets_1.6.4 farver_2.1.1
[97] htmltools_0.5.8.1 lifecycle_1.0.4 mime_0.12
[100] MASS_7.3-60.2